{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0d9e9584-4f08-4b74-9879-e27be0ee2137",
   "metadata": {},
   "source": [
    "# 异常识别\n",
    "## 机器学习 - 聚类\n",
    "通过聚类将所有数据点框定在一定的聚类之中，不在聚类之中的点即为异常点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "6c73243e-97be-4016-8a5a-c799ea0a92bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "import numpy as np \n",
    "import pandas as pd\n",
    "\n",
    "# extract one cluster\n",
    "X,y = load_iris(return_X_y=True)\n",
    "data = []\n",
    "for x,y in zip(X,y):\n",
    "    if y==0 or y==1:\n",
    "        data.append(x)\n",
    "\n",
    "# add two abnormal data point\n",
    "data.append(np.zeros(4))\n",
    "data.append(np.ones(4))\n",
    "data = np.array(data,dtype=np.float64)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad76aee4-e02e-4c7e-8932-21b120a903a5",
   "metadata": {},
   "source": [
    "### 原型聚类 k-means\n",
    "\n",
    "算法逻辑：聚类后，计算所有点到聚类中心距离，人为寻找距离超过平均距离的数据点作为异常值\n",
    "1. KMeans(n_clusters)\n",
    "2. kmeans.labels_\n",
    "3. kmeans.cluster_centers_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "3af58333-15cb-461e-a2f2-065c00b9f18e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import KMeans\n",
    "import numpy as np\n",
    "import warnings\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "num_clusters = 2\n",
    "kmeans = KMeans(n_clusters=num_clusters)\n",
    "res = kmeans.fit(data)\n",
    "labels = res.labels_\n",
    "cluster_centers = res.cluster_centers_\n",
    "distances = [np.linalg.norm(x - cluster_centers[label]) for x,label in zip(data, labels)]\n",
    "anomalies = [data[i] for i,dis in enumerate(distances) if dis > np.mean(distances)+3*np.std(distances)]\n",
    "anomalies = np.array(anomalies,dtype=np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "79454f93-d3ed-41ab-9176-93fdb5628604",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAGhCAYAAACgW4n6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAAxOAAAMTgF/d4wjAAAkF0lEQVR4nO3de1RVZf7H8Y8EZKUYalcYFO+aOWSCRnnJK2qSllpeyCZTZ7lMarI0SwbHcdS0ctScdDU2imaryVXpypRRM7MUK7tpuURGUZo0FJW4NBDw+4OfZ1IPnM3D2exz4P1aiwWHs3n2dx/IT/vs5/vsevn5+WUCAMBAgNMFAAD8FyECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIqiSlJQUTZw40e1zo0aN0t69e2u4our59NNPFR0drUaNGqlLly4V1l/ZcVfFW2+9pWnTplV7HCueeuoptWrVSpGRkfrkk0+8Pv6uXbsUFxfn9XHhXwKdLgC1x/r1650uoUoKCws1ZswYzZo1S8OGDdNLL72kSZMm6auvvrJtn8OHD9fw4cNtG/+Cb775Rlu2bNGhQ4dUVlamwsJC2/eJuokzEdRZubm5evHFF5WQkKAGDRro/vvvV2ZmptNlecXZs2cVFhamwMBABQUFKSQkxOmSUEsRIn7gnXfeUVRUlFq2bKmBAwfq5MmTkqS5c+fqmWee0dixYxUREaGYmBj9+9//liTl5+drypQpatWqlX7729/qrbfecv3MsGHDdNttt6lPnz6aNm2aIiIitGTJEknS7t271bVrV7Vs2VLdu3fXoUOHLNcZFxenXbt2XfS9ffv2qUePHoqIiFDfvn2Vnp4uSSorK9Ozzz6rVq1aqVmzZpowYYKKi4s97uOLL77Q3XffrcjISMXHx7v+0U9JSdH48eOVmJioZs2aqWPHjvrss88qHeuGG27QPffc43q8efNm3Xnnna7H8+bNU2RkpKKjo7Vnzx6PtX3//fcKCwtTUVGRJCkvL0833nijzpw549rG3dtiubm5mjRpklq2bKm2bdtqzZo1kqT4+Hi9+eab2rRpkxo3bqz//ve/evjhh7VixYoKaygpKVFkZKRGjx6tvXv3KjIyUrGxsa7nd+zYoZiYGLVo0UIJCQk6ffq067n27dtr3759Gjt2rNq0aePxeH/tyy+/VIcOHfTpp58qMzNTkZGRGjt2rJo3b6758+erQ4cO6tmzp0pKSio8Xqniv7/Kfr9FRUWaOHGiWrZsqWbNmunZZ5+tUu2oHkLEx/3yyy96+eWX9corrygjI0MdOnTQyy+/7Hr+1Vdf1YMPPqjjx4/r9ttv1yuvvCJJmjlzps6dO6eDBw/qn//8p5588kl9+umnksrfxtm7d6+OHDmijh07avny5dqyZYsk6eWXX9aMGTOUkZGh4cOHa968eca1nz17VqNHj9YLL7ygY8eOaeDAgXriiSckSV9//bVeffVVffbZZzp8+LACAgL0zTffVDre+fPndf/99ysxMVFHjx5V7969NXLkSJWUlEiS3n77bXXo0EHHjh3TmDFjtGjRIsu1Hj9+XIsXL1ZSUpIkacuWLXrttdf0ySef6F//+pf27dvncYywsDBFRUVp586dkqTU1FT16tVLTZo0qfTnpk+friZNmujQoUPavHmzZsyYoePHjysmJkbfffedDhw4oN69e+u7777ToUOHFB0dXeFYV1xxhY4eParXX39d3bp109GjR13XQzIzM5WQkKClS5fqyJEjuv766zVp0qSLfn7KlCkaOXKkpdC84JtvvtHYsWO1evVqV205OTmaNWuWRo4cqZ07d+qLL77QsWPH9MMPP1R4vFLlf38V/X63bt2qPXv26Ntvv9WBAweUkZGhrKwsy/Wjergm4uMCAwO1Zs0abdq0SatWrVJqaqr69+/ven7AgAGu/5u+4447tHv3bknlZy8bNmzQlVdeqTZt2mjo0KF69913ddVVV+nOO+/UlVdeqauvvlo9e/bUiRMnVFpaKklavny5Nm7cqClTpmjHjh2KiIgwrj0tLU3Z2dkaOXKkJKm0tFTBwcGSpMjISIWGhmratGnq3r27kpKSFBYWVul4e/bsUWhoqIYOHSpJSkxM1Ny5c11nN7fccovrH8XY2Fh9+OGHluosKirSuHHj9Oijj6pr166SpJ07d2rIkCG66aabJEkJCQk6ePCgx7FGjRqlTZs2qX///tq4caNGjRrl8Wc2b96ssrIy1zWl0tJSHTx4UNHR0Vq1apWCg4MVHx+vr776SidOnNCtt95q6bgutXXrVsXExLiO8emnn1bLli2Vn5+va665RlL5axofH295zKysLD344IMKCAhQx44dXd+/8cYb1bZtWzVq1Oiiv7eSkpIKjzciIqLSv7+Kfr+dOnVSbm6upk+frq5du2rZsmVq2rSp0WuEquNMxMdlZmYqOjpaubm5SkhI0HPPPXfR823btnV9Xa9evYueu/TxBQEBAW6/zs3NVXR0tI4cOaJhw4ZV6f/k3SkrK1Pr1q119OhR18eF/8MNCQnR/v37NWrUKB0/flyxsbH6+OOPPY5Z0TFJlb8WldU4ZcoUhYSEaNasWa7vl5aWXjTGFVdcYWm8oUOH6oMPPtDPP/+sffv2aeDAgZZ+7u2333a9RocPH9bdd9+tmJgYHTp0SOfOndPtt9+uTZs2qV27dgoKCrI05qXKyso8vi533XVXlcY8ffq0Vq1apR49elz091LR39gF7o7X099fRb/fZs2a6euvv9aAAQP01VdfqXPnzsrIyKjSccAcIeLjvvzySzVo0ECPPfaYWrRooTfffPOi5939ByqVv5/+17/+VUVFRUpPT9c777yjIUOGVLqvjIwM5eTk6Mknn1RUVJRef/31atXetWtXZWdnKzU1VZK0du1a9e7dW9L//k+/a9eumjlzptq3b+/xGsYdd9yhM2fOaNOmTZKkpUuXqnnz5mrdurWkil+LyiQlJWn//v1as2bNRUFx1113afPmzcrOzlZubq7WrVtnabwGDRooJiZGCxYsUL9+/VxnXpWJi4vTK6+8ouLiYp06dUqdOnXS/v37FRoaqoCAAAUFBal169batm2bunTpUuVjvKB///5KS0vTp59+qtLSUi1atEj9+vVznYWYiIqKUteuXfXHP/5Rf//7313X5CpT0fF6+vur6PebkpKiyZMnq3fv3kpOTta1117r8a1ReA8h4uN69+6tdu3aqWXLlrr33nt1yy236PDhwx5/bt68eWrQoIHat2+v+++/X88//7zrbYyK3HrrrRo2bJg6duyoXr16qVmzZjp69KilC97uNG7cWG+88YbmzJmjFi1aaNWqVUpJSZEk9ejRQ7fffrs6deqkFi1aKDg4WKNHj650vEaNGmnDhg1atGiRmjdvrtTUVL355puWzxIudf78eb300kvKyspSVFSUIiMjFRkZqczMTMXHx2vkyJGKiYlR9+7d1a5dO8vjjho1Si+88IKlt7IkacGCBSorK1P79u3Vq1cvPfXUU66L4dHR0Wrbtq2Cg4PVvHnzSq+HeBIZGal//OMfmjx5slq1aqX//Oc/WrlypfF4v3b99dcrMTFRTz75pMdtKzpe07+/4cOHq379+mrbtq3atm2r2267TQMGDPDKccGzetyUCgBgijMRAIAxQgQAYIwQAQAYI0QAAMYIEQCAMUIEAGDMlmVPQkNDdd1119kxNACgBv344486d+5chc/bEiLXXXcdC6ABQC3gaU073s4CABirsVV8y8rKXB+oefXq1XN9AIC31EiInD59WmfOnHEtNw5nBAQEqEmTJiyTDcBrbA+R0tJSZWdnKzw8XFdffbXdu0MlCgoKlJWVpcaNGxuteAsAl7I9RC68fXX11Vcbr7YK77gQ4rylCMBb+N/RSgwfPtwr23jD4sWLLS0BDwA1iRCxSXJysg4cOOC18R5//HG1adOmwudrKswA4Nd87h7rx49LJrf1fvjhhxUZGan9+/dr3rx5Onr0qFJSUhQUFKRXX31V8+bNU0FBgcrKyjR48GAtWLBAubm5uv7665WUlKSUlBSdPn1aHTp00IwZMyrdV2FhocaMGaMmTZro559/VmFhoUaPHq3AwEBNnDhRJSUl2rZtm7KysjR+/HhFRUVd9Hy/fv0uGi85OVkZGRkqKSnR3LlzdebMGS1YsEABAQFatmyZrrvuOiUnJ2v48OHq2LGj4uPj1a1bN3344Ydat26dFi1apAMHDmjOnDmaNWuW1q9fr82bN6tevXpas2ZN1V9MALDIZ85EioulpCSpWbPyzyY30xsxYoQefvhhffbZZ1q1apXWr1+vwYMH691335VUfme3hQsXSpL69Omj0NBQjRkzRhkZGRo8eLDuuusu161cK5Oamqp7773XdfvZsrIyjR8/Xu3atdOWLVsUFxenvn376vHHH9cdd9xx2fPu/P73v9f06dO1bt06LV68WGvWrNETTzyh11577bJtCwoKNGXKFMXFxenbb7/V/Pnz1bFjR9c9wjMzM9WtWzdLd5kDgOrwmRApLJTmzCn/es6c8sdVFRERoaCgIJWWll7UD3Hh6+7du7u+V79+fdWvX///912o559/XlFRUbrxxhs97qesrEyBgYEKDAxUQECA9u3bp02bNmnQoEEqKSmRJF1xxRWuKc3unr9UUVGRiouLL5s15a6v49prr1VISIjrWC81ZMgQde7cWTNnzlRubq7H4wEAUz7zdlZIiNS7t7RjR/nnkJDqjfe73/1Oo0aNcr2ddfDgwQq3DQ4OVnBwsFavXq1jx455HLtfv36aOHGi0tLSFBAQoMaNGys7O1urVq3SL7/8Iknq27evnnvuOfXr1089e/a87PlLrVy5Uj/99JOWLVum06dP66GHHlJAQIBefvllS8fbuHFjPfTQQ5o3b5727t2rvXv3qn79+rrqqqss/TwAmLDlHutt2rRxrZ1VUlKiw4cPq02bNkzxrcCvr3fYid8FgKoKCwtTenp6hc/7zJlIXZacnOx0CUCtZjphB575zDURAPA2b0zYQeUIEQC1ljcm7KByhAiAWuvChB3JOxN2cLk6GyIPP/yw8vLyXI+nTZumgoICBysCYIft26WysvLP8D7fC5Hjx20Z1tOyIIsWLWKVYQCoIt8JkWpeAfvss880YsQIPfDAA+rTp48kadmyZdq2bZtmzJjhWhakIr8+M1mxYoUmTZqkiRMnup6bPXu2nn/+eUnS9OnTNWHCBP3lL38xOVIAqDV8Z4rvpVfApk2TgoIs//iFpUK++uorxcbGur4fGBio+fPn68iRI65lQTxZuXKl+vXrp/T0dFfHd0xMjAYOHChJOnnypIYOHaqoqCjL9QFAbeQ7ZyJevAJ2oUs7MzPT6OfDwsI0f/58jRs3ztWU9+slU6ZOnaorr7xS06dPN64RAGoD3zkTkap15evxxx93LRUyY8YMTZ8+Xd9++60GDx4s6eJlQcLCwioda+DAgRo9erQaNmyo+Pj4y55fv369cnNz1bx5c+N6AaA2YNmTOoTfBYCq8rTsie+8nQUA8Du2h8iFpcy5r7fzLvwO3C0vDwAmbL8mEhAQoKCgIJ05c0ZNmjThHzCHlJWV6cyZMwoKCrrsniUAYKpGLqxHRETo+PHjysnJqYndoQJBQUGKYClTAF5UIyESHBysVq1aqbS0lLe1HFKvXj3OQAB4XY1O8eUfMQDexH1CnMe/6gD8DvcJ8R2ECAC/w31CfAchAsDvcJ8Q3+Fby54AgEXcH8Q3cCYCADBGiAAAjBEiABxl081MUUMIEQCOYJpu7UCIAHAE03RrB0IEgCOYpls7MMUXgGOYpuv/OBMBABgjRAAAxqoUIuPGjVNKSopdtQAA/IzlENmwYYPee+89O2sBgBpBb4r3WAqRkydPavHixXr00UftrgcAbENvivdZCpHExEQtWLBADRs2dPv8kiVL1Lp1a9dHXl6eV4sEAG+gN8X7PIbI6tWr1bZtW8XGxla4zdSpU5Wenu76aNCggVeLBABvoDfF+zz2iWzcuFHnz59XXFycMjMzVb9+fTVq1Ejx8fE1UR8AeBW9Kd7lMUQ2bNjg+nru3LmKiIggQAAAkqrYsf7ss8/aVQcAwA/RbAgAMEaIAPALe/Y4XQHcIUQA+LSCAqlHDyk2tvxzQYHTFeHXCBEAPi0nR/roo/KvP/qo/DF8ByECwKeFh0uhoeVfh4aWP4bv4H4iAHweZx++izMRAIAxQgQAYIwQAVCj3n675vfJ0u/2IUQA1Ijz56WICOm++8o/nz9v/z5Z+t1+hAiAGvGf/0gnTpR/feJE+WO7sfS7/QgRADWifXsp8P/ngwYGlj+2G0u/248pvgBqjBNvJ7H0u704EwEAGCNEAADGCBEAgDFCBECNstKz4W4bej18EyECoEZY6dlwtw29Hr6tXn5+fpm3B23Tpo2ysrK8PSwAP5abKzVq9L/H589fPuXW3TaS55+DfcLCwpSenl7h85yJAKgRVno23G1Dr4dvo08EQI2x0rPhbht6PXwXZyIAAGOECADAGCECADBGiABwqU4vBr0ddRMhAqBavRj0dtRt9IkAsNTDUZWflejtqC3oEwHgUXV6MejtqNvoEwEgqXq9GPR21F2ciQAAjBEiAABjhAgA2+zZ4/l73p5WjJpFiADwuoICqUcPKTa2/HNBweXfO3/eu9OK4Qym+ALwuqws6Te/+d/jEyfKP//6e99+K3Xo8L/H1Z1WzAwwezDFF0CNCw+XQkPLvw4NLX986ffat/futGI4gym+AGyRk+P5e96eVoyax5kIAMAYIQIAMEaIAACMESIAXKz2Xbjr/7C63aX78PY+vYk+FM8IEQCW+y7c9X9Y3e7SfRQUeHef3kQfinX0iQCw3Hfhrv8jPNzadiEhF+/jxImLt6nuPr2JPpT/oU8EgEdW+y7c9X9Y3e7SfYSHe3ef3kQfinWciQAAKsSZCADANoQIAMAYIQLANu6myNaVabN15TgJEQBe526KbF2ZNltXjvMCLqwD8Dp3U2SlujFttrZND+bCOoAa526KbF2ZNltXjvMCloIHYAt3S7XXleXb68pxSpyJAACqgRABABgjRAAAxggRoI6w0rfgbpvqLMHuT70SdtfqxGtRE/skRIBazkrfgrttqrMEuz/1SthdqxOvRU3ukz4RoJaz0rfgbpvcXPMl2P2pV8LuWp14Lby5T/pEgDrOSt+Cu22qswS7P/VK2F2rE69FTe6TMxEAQIU4EwEA2IYQAQAYI0QAAMYIEQAuVvsKvN1PgspZfW2d6EUhRABY7ivwdj8JKmf1tXWyL4fZWQAs9xV4u58ElcvKsvba2tmLwuwsAB5Z7Svwdj8JKmf1tXWyL4czEQBAhbxyJpKfn68dO3bo4MGDXisMAOD/PIZIYWGh7rnnHqWlpenpp5/W8uXLa6IuAIAf8BgiBw4c0FNPPaVnnnlGCxYs0ObNm2uiLgDV4G6qpzenf7qbcmr3Pq2OZbU20/GdWDLel5fU9xgi0dHRGjRokA4fPqyFCxdq1KhRNVEXAAPupnp6c/qnuymndu/T6lhWazMd34kl4/1hSX3LF9aXL1+u1atXa86cOerfv/9Fzy1ZskRLly51Pc7Pz9e5c+e8WigAz9xN9ZS8N/3T3ZTTkBB792l1+qrV2qwsg291erPdS8ZLzi+p77UpvpMnT9batWs1a9asy56bOnWq0tPTXR8NGjQwqxZAtbib6unN6Z/uppzavU+rY1mtzXR8J5aM94cl9T2eiaxYsUKnTp1SUlKS9u3bp6SkJG3ZsqXSQZniCwC1g6czkUBPAyQkJGj8+PHq06ePgoOD9eKLL3q1QACA//IYIldffbXWr19fE7UAAPwMy54AAIwRIkAd4cu9Bt7kzT4Lq2P5Ux+KtxEiQC3nD70G3uDNPgurY/lTH4pdWIARqOXs7m/wFd7ss7A6lpXxfaUPxRRLwQN1nD/0GniDN/ssrI7lT30oduFMBABQIc5EAAC2IUQAAMYIEQCAMUIEQK3n7h4jvjBWdXpCfKWfhBABUGu5u8eIL4xVnZ4QX+snYXYWgFrL3T1GwsOdH6s6PSE13U/C7CwAdZa7e4z4wljV6QnxtX4Sj6v4AoA/y8nxzbG2b3fmZ72NMxEAgDFCBABgjBAB4ChvT1W1Mp7Vabq+Mo3WlxEiABzh7amqVsazOk3X16bR+jKm+AJwhLenqloZz+o0XV9dlt0JTPEF4JO8PVXVynhWp+n62jRaX8aZCACgQpyJAABsQ4gAAIwRIgAAY4QIAL9ltY/j0u3o//AeQgSA37Hax3HpdgUF9H94G7OzAPgdq30cl2534sTFfSJ1uf/DKmZnAah1rPZxXLpdeDj9H97GUvAA/JLV5dAv3c6XllGvDTgTAQAYI0QAAMYIEQCAMUIEQI3yZo+Gu7F8pQfEV+qwGyECoEZ48x4d7sbylXuA+EodNYU+EQA1wpv36HA3luQb9wCpbfcioU8EgE/w5j063I3lK/cA8ZU6agpnIgCACnEmAgCwDSECADBGiADwCiemtO7ZU/P7xMUIEQDV4sSU1oICqUcPKTa2/HNBgf37hHtcWAdQLU5Mac3KunhJ9xMnylfohfdxYR2ArZyY0hoeLoWGln8dGkqAOIml4AFUmxPLq+fk1Pw+cTnORAAAxggRAIAxQgQAYIwQAeowu3s7/H386vDl2ryJEAHqILt7O/x9/Orw5drsQJ8IUAfZ3dvh7+NXhy/XZoI+EQCXsbu3w9/Hrw5frs0OnIkAACrEmQgAwDaECADAGCECABWwOk23rkzndYcQAYBLWJ2mW9em87rDhXUAuITVabq1bTqvO1xYB4AqsjpNt65N53WHpeABwA2ry9s7sQy+L+FMBABgjBABABgjRAAAxggRAIAxQgQAYIwQAQAYI0QAAMYIEQCAMUIEAGDMY4jk5eVpxIgRGjJkiLp3767PP/+8JuoCAPgBjyGyfv16jRgxQps2bVJycrJmz55dE3UBAPyAx7WzJkyY4Pr61KlTuummm2wtCEDtcfy4FBHhdBWwk+VrItnZ2Vq4cKGmT59uZz0AagHus1F3WLqfSFFRke69915NmDBB991332XPL1myREuXLnU9zs/P17lz57xaKAD/URfus1FXeLqfiMcQKSkp0dixY9WtWzclJiZa2ik3pQLQp4+0Y0f5fTbq+nLp/sxTiHi8JrJ69Wpt3bpVZ8+e1fvvv6+mTZtq7dq1Xi0SQO1DcNQN3B4XAFAhbo8LALANIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwJjlECkuLtawYcO0a9cuO+sBAPgRSyFSVFSk4cOH68SJE3bXU2t8v+e40yUAgO0sn4ksX75cnTt3trOWWqG4oFg7eyQpLLaZdvZIUnFBsdMlAYBtLIVIcHCwwsLCKnx+yZIlat26tesjLy/PawX6m8KcQvX6aI4kqddHc1SYU+hwRQBgH69cWJ86darS09NdHw0aNPDGsH4pJDxE+0N7S5L2h/ZWSHiIwxUBgH0CnS6gNuqcs738s8N1AIDdmOILADBWpTORlStX2lUHAMAPcSYCADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMGYpRJKTkxUbG6thw4YpOzvb7pokSd/vOV4j+wEAmPMYIlu3blVaWpp2796txMREzZ4929aCiguKtbNHksJim2lnjyQVFxTbuj8AgDmPIbJt2zY98MADCggIUM+ePZWWlnbZNkuWLFHr1q1dH3l5ecYFFeYUqtdHcyRJvT6ao8KcQuOxAAD28hgieXl5Cg8PlyTVq1dPBQUFl20zdepUpaenuz4aNGhgXFBIeIj2h/aWJO0P7a2Q8BDjsQAA9gr0tEHDhg0vCo6ffvrJ1oIkqXPO9vLPtu8JAFAdHs9EunXrpp07d0qSDh8+rKZNm9pdEwDAT3gMkUGDBmnfvn36wx/+oISEBE2ZMqUm6gIA+AGPb2fVr19f27ZtU2pqqkaPHq0uXbrURF0AAD/gMUSk8iCJj4+3uxYAgJ+hYx0AYIwQAQAYI0QAAMYIEQCAMUIEAGCMEAEAGCNEAADGCBEAgDFCBABgjBABABiztOxJVf34448KCwur1hh5eXnVui+J0/y9fsn/j4H6nefvx0D98nhL9Hr5+fll1dqDTVq3bq309HSnyzDm7/VL/n8M1O88fz8G6veMt7MAAMYIEQCAMZ8Nkccee8zpEqrF3+uX/P8YqN95/n4M1O+Zz14TAQD4Pp89EwEA+D5CBLVSfn6+duzYoYMHDzpdClCrESI2KS4u1rBhw7Rr1y6nS6mSvLw8jRgxQkOGDFH37t31+eefO11SlRUWFuqee+5RWlqann76aS1fvtzpkoyNGzdOKSkpTpdRZbfeeqvi4uIUFxen2bNnO12OsZSUFE2ePNnpMqps9erVrtc/Li5OTZs21Q8//GDLvmxpNqyu5ORkpaam6oYbbtDKlSt13XXXOV1SlRQVFWnEiBH6/vvvnS6lytavX68RI0Zo5MiR2r59u2bPnq2NGzc6XVaVHDhwQE899ZQGDRqkAwcOaMaMGX75D8GGDRv03nvvqW/fvk6XUiUZGRnq1KmT1q1b53Qp1XL06FEtWbJE27dvd7qUKhs3bpzGjRsnSUpLS9Mrr7yim266yZZ9+dyZyNatW5WWlqbdu3crMTHRb/8vZvny5ercubPTZVTZhAkTNHLkSEnSqVOnbPvDs1N0dLQGDRqkw4cPa+HChRo1apTTJVXZyZMntXjxYj366KNOl1Jlu3fv1pdffqk+ffro7rvv9suz2dLSUo0fP14dOnTQ66+/rpycHKdLMvbHP/5Rc+bMsW18nwuRbdu26YEHHlBAQIB69uyptLQ0p0uqsuDg4Gov++K07OxsLVy4UNOnT3e6FGPbtm3ToUOH/O5MVpISExO1YMECNWzY0OlSqqxTp07auHGjtm/fruTkZM2cOdPpkqrsjTfeUGlpqf785z+rffv2iouLU2lpqdNlVdkHH3ygVq1aKTw83LZ9+FyI5OXluQ64Xr16KigocLiiuqeoqEgPPfSQZs2apRYtWjhdjrHJkydr7dq1mjVrltOlVMnq1avVtm1bxcbGOl2Kkfbt26tly5aSpKioKB06dMjhiqru888/1yOPPKLf/OY36tmzp+rXr6+MjAyny6qyv/3tb7afzfrcNZGGDRteFBw//fSTg9XUPSUlJRo3bpzi4uJ03333OV2OkRUrVujUqVNKSkrS2bNnFRoa6nRJVbJx40adP39ecXFxyszMVP369dWoUSPFx8c7XZolkyZN0tixY9WvXz9t2LDBL9/WbdeunWvNqezsbGVlZenmm292uKqqyc7O1tGjRxUVFWXrfnwuRLp166adO3cqPj5ehw8fVtOmTZ0uqU5ZvXq1tm7dqrNnz+r9999X06ZNtXbtWqfLqpKEhASNHz9effr0UXBwsF588UWnS6qSDRs2uL6eO3euIiIi/CZAJGnWrFkaN26cZs6cqbCwMC1ZssTpkqosISFBiYmJGjBggLKzs/WnP/1J11xzjdNlVUlqaqp69uxp+358rmP9559/Vt++fRUTE6OPP/5YkyZN0iOPPOJ0WQAAN3wuRKTyIElNTdXNN9+sLl26OF0OAKACPhkiAAD/4HOzswAA/oMQAQAYI0QAAMYIEQCAMUIEAGCMEAEAGPs/LB86fxvS+fkAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 480x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib as mpl\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "plt.rcParams[\"font.sans-serif\"] = \"SimHei\"\n",
    "plt.rcParams[\"axes.unicode_minus\"] = False\n",
    "\n",
    "fig = plt.figure(figsize=(6,6),dpi=80,facecolor=\"whitesmoke\", edgecolor=\"grey\")\n",
    "ax = fig.add_subplot(1,1,1)\n",
    "ax.scatter(data[:,0],data[:,1],marker='*',s=4.5,color='blue',label=\"normal data points\")\n",
    "ax.scatter(anomalies[:,0],anomalies[:,1],marker='*',s=4.5,color='red',label=\"outliers\")\n",
    "ax.set_title(\"anomalies on 2d view for kmeans\")\n",
    "ax.legend(loc=\"upper left\", fontsize=7)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee3934f5-c530-4dc6-ae70-5d0a03a1c8d4",
   "metadata": {},
   "source": [
    "### 密度聚类 - DBSCAN\n",
    "\n",
    "1. DBSCAN(eps,min_samples) - eps表示邻域半径， min_samples表示一个连接中的最小样本数\n",
    "2. dbscan.fit_predict(data) 将会直接将聚类结果预测出来,那些标记为-1的数据点表明是无法连接到的点，在此即为离群点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "e1728bd1-fe4c-4537-9ec6-a52c403ddb6e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([0., 0., 0., 0.]), array([1., 1., 1., 1.])]"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.cluster import DBSCAN\n",
    "import numpy as np\n",
    "\n",
    "eps = 0.8\n",
    "min_samples = 3\n",
    "dbscan = DBSCAN(eps=eps, min_samples=min_samples,leaf_size=15)\n",
    "labels = dbscan.fit_predict(data)\n",
    "anomalies = [data[i] for i,label in enumerate(labels) if label == -1]\n",
    "anomalies"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb3d4b63-f948-41f3-a2e7-aa550848efaf",
   "metadata": {},
   "source": [
    "## 机器学习 - 孤立森林 \n",
    "\n",
    "1. IsolationForest() 和 RandomForest() 的参数集是相通的\n",
    "2. 返回值即为不能与其他数据联通的点集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "b5b92048-a77e-4ea9-b85c-b200cac87026",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import IsolationForest\n",
    "import numpy as np\n",
    "\n",
    "isof = IsolationForest(n_estimators = 500)\n",
    "isof.fit(data)\n",
    "res = isof.predict(data)\n",
    "anomalies = [data[i] for i,p in enumerate(res) if p == -1]\n",
    "anomalies = np.array(anomalies,dtype=np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "4eed1b1d-ebe8-41b6-9709-26bc5ebd1b9e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAGhCAYAAACgW4n6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAAxOAAAMTgF/d4wjAAAo3ElEQVR4nO3de1zVdZ7H8bekxDSI4WVmDAIBFXHMzBSRyUuoE5pS5m28oDam7vowadYLjo0srrXmrRwtR51qVjFt2pwm3XGUxXK0UsjcLHVMdBCk2YxEJS6NjLB/sJ5JPXB+fDlXeD0fDx5w+P3O9/c5R/DN75zv5/trVlZWVi0AAAz4eboAAIDvIkQAAMYIEQCAMUIEAGCMEAEAGCNEAADGCBEAgDFCxEdkZGRoxowZdreNHz9ehw8fdnNFDfPhhx+qd+/eatWqlXr16lVr/XU97vp48803NW/evAaPY8X8+fPVsWNHRURE6IMPPnDZcQ4fPqzx48c7bbz8/HzFxMQ4bbzCwkL169fPaePV5eOPP1ZsbKw6dOign/zkJ245Jmo093QBaLjt27d7uoR6qaio0MSJE7V48WKNHDlSL7zwgmbOnKljx4657JijR4/W6NGjXTb+dZ9++qn27NmjU6dOqbq6WhUVFS47VlxcnOLi4lw2vhXHjh1TQUGBRowYccu20NBQHTx40C11LF++XLNnz9bkyZP11VdfueWYN9u1a5fCwsJ07733euT4nsKZCNyupKREzz//vJKTkxUYGKhRo0YpPz/f02U5xaVLlxQSEqLmzZurRYsWCgoK8nRJLvXJJ59o165dni5Dly5dUocOHSRJbdu29UgNu3bt0ieffOKRY3sSIVKH3//+9+rRo4eioqI0dOhQffHFF5KkZ599Vj//+c81adIkhYWFKTY2Vn/5y18kSWVlZZo9e7Y6duyoe++9V2+++abtPiNHjtR9992nQYMGad68eQoLC9PatWslSe+995769OmjqKgo9evXT6dOnbJcZ2Jiog4cOHDD93JyctS/f3+FhYVp8ODBys3NlSRVV1fr6aefVseOHRUeHq7p06ersrLS4TH+53/+Rw8++KAiIiKUlJRk+08/IyND06ZNU0pKisLDw9WtWzcdOXKkzrG+//3va/jw4bbbu3fv1o9+9CPb7WXLlikiIkK9e/fWoUOHHNb2+eefKyQkRFevXpUklZaW6gc/+IEuXrxo28fey2IlJSWaOXOmoqKiFB0drS1btkiSkpKS9MYbb2jXrl1q3bq1/va3v2nq1KnauHFjrTVcu3ZNERERmjBhgg4fPqyIiAjFx8fbtr/zzjuKjY1VZGSkkpOTb/hrOSYmRjk5OZo0aZI6d+7s8PFed+DAASUmJt7wvatXr2rGjBmKiopSeHi4nn76adu2oqIiTZo0SZGRkYqNjdX+/fstHeeZZ55Rp06d1LFjR/3zP/+zqqqqlJ+fr4iICC1YsEA7duxQRESExo4de8P9ant5bN26deratauio6OVnp6ua9euSar5OX7llVfUv39/3X333Ro1apTt37Q2q1evVkREhA4fPqwJEyYoIiJCv/3tbyXV/rso/ePnISMjQ/fee69+9atf2bZt2bJF3bt3V3h4uKZOnaqysjJJ0uXLlzVmzBhFRkYqMjJS69atkyT99re/VUREhHbs2KEFCxYoIiJCq1evtvTcNgaESC3+/ve/66WXXtKGDRt09uxZde3aVS+99JJt+8svv6yf/OQnKigo0P33368NGzZIkhYtWqTLly/rxIkT+s///E/NnTtXH374oaSal3EOHz6sM2fOqFu3blq/fr327NkjSXrppZe0cOFCnT17VqNHj9ayZcuMa7906ZImTJig1atX69y5cxo6dKh+9rOfSar5y/Hll1/WkSNHdPr0afn5+enTTz+tc7wrV65o1KhRSklJUV5enhISEjR27FjbL/9bb72lrl276ty5c5o4caJWrVpludaCggKtWbNGaWlpkqQ9e/boN7/5jT744AP993//t3JychyOERISoh49etj+U8zMzNTAgQPVpk2bOu+XmpqqNm3a6NSpU9q9e7cWLlyogoICxcbG6s9//rOOHz+uhIQE/fnPf9apU6fUu3fvWse67bbblJeXp23btikuLk55eXm290Py8/OVnJysdevW6cyZM/re976nmTNn3nD/2bNna+zYsZZCsy579+7VoUOHdPLkSR0/flxnz55VYWGhJGn69Olq3769zpw5o1/+8peaNGmSCgoK6hzv008/1cGDB5WTk6NTp07p2LFjysrKUnh4uPLy8rRixQqNGjVKeXl5euONNxzW9+abb+qVV17Rvn37lJOTowMHDtj+M5akF154Qb/+9a91+vRp5efnKzMzs87x5s6dq7y8PMXFxWnbtm3Ky8vTuHHjJNX9uyhJf/rTn5SVlaXf/e53evzxxyXV/DG3Zs0aZWZm6rPPPlN5ebnt937r1q0qKSnRmTNn9P777yszM1N/+9vfNG7cOOXl5WnUqFFasWKF8vLyNHfuXIfPRWNBiNSiefPm2rJliz755BPNmDFDO3bs0IULF2zbH3roIdtf03379tXly5cl1Zy9PPXUU7r99tvVuXNnPfroo3r77bclST/60Y90++2364477tCAAQMUFBSkqqoqSdL69etVUlKi2bNna+PGjTccq76ys7NVVFSksWPHKioqSmvXrtVnn30mSYqIiFBwcLDmzZun119/XWlpaerZs2ed4x06dEjBwcF69NFHJUkpKSn6y1/+Yju7+eEPf6iZM2eqWbNmio+Ptz0Xjly9elVTpkzRE088oT59+kiS9u/frxEjRqh9+/a68847lZycbGms8ePH215W2blzp6U3nHfv3q2tW7eqc+fOGjx4sKqqqnTixAn17t1bJ0+e1MmTJ5WUlKRjx47p/PnzuueeeyzVcrO9e/cqNjZWffr0kZ+fnxYsWKC9e/fa/sKVap7TpKQkh8HnSPfu3VVSUqLU1FT913/9l1588UWFhoaqrKxMWVlZmj9/vvz8/NS3b1/16tVLe/furXO8e+65RytXrtTLL7+s5ORkffbZZw362Xz77bc1efJktW/fXq1atdKsWbP01ltv2bY/+eSTio6O1ne+8x3dd999unTpkvGx6vpdlKSAgAC98sorioqKUkBAgKSan4nz58+rb9++iomJ0aFDh3TixAlJUmxsrE6dOqVFixbp8OHD2rZtm26//Xbj+hoLQqQW+fn56t27t0pKSpScnKxf/OIXN2yPjo62fd2sWbMbtt18+zo/Pz+7X5eUlKh37946c+aMRo4cWa+/5O2prq5Wp06dlJeXZ/u4/hduUFCQjh49qvHjx6ugoEDx8fF6//33HY5Z22OS6n4u6qpx9uzZCgoK0uLFi23fr6qqumGM2267zdJ4jz76qN5991198803ysnJ0dChQy3d76233rI9R6dPn9aDDz5o+8/i8uXLuv/++7Vr1y516dJFLVq0sDTmzaqrqx0+Lw888IDR2DcLDw/XJ598ooceekjHjh1Tz549dfbsWVVX1yzWbfXf57rf/e53Gjt2rNq1a6fU1FQ99thjDarP0XNh8rNUl7rG6NOnj5o3v3FuUXV1tSZNmmT7mThz5oztJefY2Fh9+OGHiouLU1ZWlnr27NmgkGssCJFafPzxxwoMDNSTTz6pyMjIW07Vvx0C35aUlKRf/vKXunr1qnJzc/X73//e7syVbzt79qyKi4s1d+5c9ejRQ9u2bWtQ7X369FFRUZHtpYCtW7cqISFB0j/+0u/Tp48WLVqkmJgYh+9h9O3bVxcvXrT9pb9u3Tp16NBBnTp1klT7c1GXtLQ0HT16VFu2bLkhKB544AHt3r1bRUVFKikp0WuvvWZpvMDAQMXGxmr58uUaMmSI/P39Hd4nMTFRGzZsUGVlpS5cuKDu3bvr6NGjCg4Olp+fn1q0aKFOnTopKytLvXr1qvdjvO7HP/6xsrOz9eGHH6qqqkqrVq3SkCFD9N3vftd4zNpkZGRo1qxZSkhIUHp6uu688059+umnCgwMVEJCglavXq2qqiplZ2fryJEjGjJkSJ3jffDBB+rTp4+mTJmia9euKSsr64bt7dq1s70/VlpaqvLy8jrHS0pKUkZGhi5cuKCSkhJt2LBBjzzyiG27yc9SXceq7+/isGHDtHPnTuXn56uqqkopKSlatGiRpJr36lasWKHhw4frmWee0ddff61z587Z7tuuXTvb7S+//NJpj8PbESK1SEhIUJcuXRQVFaVHHnlEP/zhD3X69GmH91u2bJkCAwMVExNje430+ks1tbnnnns0cuRIdevWTQMHDrS93mzlDW97Wrdurddff11Lly5VZGSkXn31VWVkZEiS+vfvr/vvv1/du3dXZGSk/P39NWHChDrHa9WqlXbs2KFVq1apQ4cOyszM1BtvvGH5LOFmV65c0QsvvKDCwkL16NFDERERioiIUH5+vpKSkjR27FjFxsaqX79+6tKli+Vxx48fr9WrV1vunVi+fLmqq6sVExOjgQMHav78+bY3w3v37q3o6Gj5+/urQ4cOdb4f4khERIT+4z/+Q7NmzVLHjh3117/+VZs2bTIery6jR49WQECAoqOjFR0drfvuu08PPfSQJOnXv/61CgoK1LFjR82ePVubN2+2zWiqzeOPP66TJ08qLCxMaWlp6tWr1w2/B0OGDNFdd92lyMhIdevWzeFLXWPHjtXkyZM1cOBA9erVS3379lVKSkqDH7c9Jr+L/fr10+LFizV8+HB17NhRV65c0TPPPCOp5j2lc+fOKSoqSvfdd58mTpx4w3TeWbNm6d1331WHDh1uCMbGrhkXpQIAmOJMBABgjBABABgjRAAAxggRAIAxQgQAYIwQAQAYc8lS8MHBwWrXrp0rhgYAuNGXX35Z51JGLgmRdu3a2RZ9AwD4rpCQkDq383IWAMCY265sWF1dbfuA+zVr1sz2AQDO4pYQ+eqrr3Tx4kXbsufwDD8/P7Vp08ZjV34D0Pi4PESqqqpUVFSk0NBQ3XHHHa4+HOpQXl6uwsJCtW7d2qmrpQJoulweItdfvrrjjjuMV32Fc1wPcV5SBOAs/Dlah9GjRztlH2dYs2aNpaXoAcCdCBEXSU9P1/Hjx5023lNPPaXOnTvXut1dYQYA3+a22VlWFRRIYWH1v9/UqVMVERGho0ePatmyZcrLy1NGRoZatGihl19+WcuWLVN5ebmqq6v18MMPa/ny5SopKdH3vvc9paWlKSMjQ1999ZW6du2qhQsX1nmsiooKTZw4UW3atNE333yjiooKTZgwQc2bN9eMGTNsV4ArLCzUtGnT1KNHjxu233w1ufT0dJ09e1bXrl3Ts88+q4sXL2r58uXy8/PTiy++qHbt2ik9PV2jR49Wt27dlJSUpLi4OP3pT3/Sa6+9plWrVun48eNaunSpFi9erO3bt2v37t1q1qyZtmzZUv8nEwAs8pozkcpKKS1NCg+v+WxyUb8xY8Zo6tSpOnLkiF599VVt375dDz/8sN5++21JNVeYW7lypSRp0KBBCg4O1sSJE3X27Fk9/PDDeuCBB2yXlK1LZmamHnnkEdulN6urqzVt2jR16dJFe/bsUWJiogYPHqynnnpKffv2vWW7Pf/0T/+k1NRUvfbaa1qzZo22bNmin/3sZ/rNb35zy77l5eWaPXu2EhMTdfLkST333HPq1q2b7Vrl+fn5iouL09y5c+v/JAJAPXhNiFRUSEuX1ny9dGnN7foKCwtTixYtVFVVdUM/xPWv+/XrZ/teQECAAgIC/v/YFVqxYoV69OihH/zgBw6PU11drebNm6t58+by8/NTTk6Odu3apWHDhunatWuSpNtuu802pdne9ptdvXpVlZWVt8yastfXceeddyooKMj2WG82YsQI9ezZU4sWLVJJSYnDxwMAprzm5aygICkhQXrnnZrPQUENG+/xxx/X+PHjbS9nnThxotZ9/f395e/vr82bN+vcuXMOxx4yZIhmzJih7Oxs+fn5qXXr1ioqKtKrr76qv//975KkwYMH6xe/+IWGDBmiAQMG3LL9Zps2bdLXX3+tF198UV999ZUmT54sPz8/vfTSS5Yeb+vWrTV58mQtW7ZMhw8f1uHDhxUQEKDvfOc7lu4PACZcco31zp0729bOunbtmk6fPq3OnTszxbcW336/w5X4twBQXyEhIcrNza11u9eciTRl6enpni4BaNQ+P1SgkL4GM3bgkNe8JwIAzlZZXqn9/dMUEh+u/f3TVFluMGMHdSJEADRaFcUVGniwZsbOwINLVVFsMGMHdSJEADRaQaFBOhqcIEk6GpygoNAGztjBLZpsiEydOlWlpaW22/PmzVN5ebkHKwLgCj2L90nV1TWf4XTeFyIFBS4Z1tGyIKtWrWKVYQCoJ+8JkQa2rB85ckRjxozRuHHjNGjQIEnSiy++qKysLC1cuNC2LEhtvn1msnHjRs2cOVMzZsywbVuyZIlWrFghSUpNTdX06dP17//+7yaPFAAaDe+Z4ntzy/q8eVKLFpbvfn2pkGPHjik+Pt72/ebNm+u5557TmTNnbMuCOLJp0yYNGTJEubm5to7v2NhYDR06VJL0xRdf6NFHH1WPHj0s1wcAjZH3nIlcb1mXGtyyfr1LOz8/3+j+ISEheu655zRlyhRbU963l0yZM2eObr/9dqWmphrXCACNgfeciUjSPvM3vp566inbUiELFy5UamqqTp48qYcffljSjcuChISE1DnW0KFDNWHCBLVs2VJJSUm3bN++fbtKSkrUoUMH43oBoDFg2ZMmhH8LAPXlaNkT73k5CwDgc1weIteXMue63p53/d/A3vLyAGDC5e+J+Pn5qUWLFrp48aLatGnDf2AeUl1drYsXL6pFixa3XLMEAEy55Y31sLAwFRQUqLi42B2HQy1atGihMJNrDwNALdwSIv7+/urYsaOqqqp4WctDmjVrxhkIAKdz6xRf/hMD4EwFBRIn157F/+oAfE4DV0mCExEiAHzOzaskVXCZEI8hRAD4HCeukoQG8q5lTwDAogaskgQn4kwEAGCMEAEAGCNEAHjU54dcczVTuAchAsAjKssrtb9/mkLiw7W/f5oqy5mn64sIEQAeUVFcoYEHa+bpDjy4VBXFzNP1RYQIAI8ICg3S0eCaebpHgxMUFMo8XV/EFF8AHtOzuGaebk8P1wFznIkAAIwRIgAAY/UKkSlTpigjI8NVtQAAfIzlENmxY4f+8Ic/uLIWAHALelOcx1KIfPHFF1qzZo2eeOIJV9cDAC5Db4rzWQqRlJQULV++XC1btrS7fe3aterUqZPto7S01KlFAoAz0JvifA5DZPPmzYqOjlZ8fHyt+8yZM0e5ubm2j8DAQKcWCQDOQG+K8znsE9m5c6euXLmixMRE5efnKyAgQK1atVJSUpI76gMAp6I3xbkchsiOHTtsXz/77LMKCwsjQAAAkurZsf7000+7qg4AgA+i2RAAYIwQAeATDh3ydAWwhxAB4NXKy6X+/aX4+JrP5eWergjfRogA8GrFxdLBgzVfHzxYcxvegxAB4NVCQ6Xg4Jqvg4NrbsN7cD0RAF6Psw/vxZkIAMAYIQIAMEaIAHCrt95y/zELWPndZQgRAG5x5YoUFiY99ljN5ytXXH/MykopLU0KD6/5XMnK705HiABwi7/+VTp/vubr8+drbrtaRYW0tGbldy1dWnMbzkWIAHCLmBip+f/PB23evOa2qwUFSQk1K78rIaHmNpyLKb4A3MYTLyft2+f+YzYlnIkAAIwRIgAAY4QIAMAYIQLAraz0bNjbh14P70SIAHALKz0b9vah18O7NSsrK6t29qCdO3dWYWGhs4cF4MNKSqRWrf5x+8qVW6fc2ttHcnw/uE5ISIhyc3Nr3c6ZCAC3sNKzYW8fej28G30iANzGSs+GvX3o9fBenIkAAIwRIgAAY4QIAMAYIQLApiG9GPR2NE2ECIAG9WLQ29G00ScCwFIPR33uK9Hb0VjQJwLAoYb0YtDb0bTRJwJAUsN6MejtaLo4EwEAGCNEAADGCBEALnPokOPvOXtaMdyLEAHgdOXlUv/+Unx8zefy8lu/d+WKc6cVwzOY4gvA6QoLpbvv/sft8+drPn/7eydPSl27/uN2Q6cVMwPMNZjiC8DtQkOl4OCar4ODa27f/L2YGOdOK4ZnMMUXgEsUFzv+nrOnFcP9OBMBABgjRAAAxggRAIAxQgSAjdW+C3v9H1b3u/kYzj6mM31+iEYURwgRAJb7Luz1f1jd7+ZjlJc795jOVFleqf390xQSH679/dNUWU4jSm3oEwFgue/CXv9HaKi1/YKCbjzG+fM37tPQYzpTSWGJgu7+R7El568oKLRpziOmTwSAQ1b7Luz1f1jd7+ZjhIY695jOFBQapKPBNcUdDU5osgFiBWciAIBacSYCAHAZQgQAYIwQAeAy9qbvNpXl25vK9GBCBIDT2Zsy3FSWb29q04N5Yx2A09mbMiw1jeXbG9v0YN5YB+B29qYMN5Xl25va9GDORAAAteJMBADgMoQIAMAYIQIAMEaIAE2Elf4Me/s0ZAl2X+oJcXVfhyeeC3f0qhAiQCNnpT/D3j4NWYLdl3pCXN3X4Ynnwp29KszOAho5K8u829unpMR8CXarS8t7A1f3dXjiuXDmY2J2FtDEWenPsLdPQ5Zg96WeEFf3dXjiuXBnrwpnIgCAWnEmAgBwGUIEAGCMEAEAGCNEANhY7WVwdj8J6mb1ufXENUwIEQCWexmc3U+Cull9bj15DRNmZwGw3Mvg7H4S1K2w0Npz68peF2ZnAXDIai+Ds/tJUDerz60nr2HCmQgAoFZOORMpKyvTO++8oxMnTjitMACA73MYIhUVFRo+fLiys7O1YMECrV+/3h11AQB8gMMQOX78uObPn6+f//znWr58uXbv3u2OugA0gL2pns5citzelFN74zvzmFbHslqb6fiuXtLd3viemLprlcMQ6d27t4YNG6bTp09r5cqVGj9+vDvqAmDA3lRPZy5Fbm/Kqb3xnXlMq2NZrc10fFcv6W73efTg1F2rLL+xvn79em3evFlLly7Vj3/84xu2rV27VuvWrbPdLisr0+XLl51aKADH7E31VFCQ05YitzflNCjo1mm/kvOWP7c6/dhqbVaWwbc6vdmZK/LaG18lrl2m3gqnTfGdNWuWtm7dqsWLF9+ybc6cOcrNzbV9BAYGmlULoEHsTfV05lLk9qac2hvfmce0OpbV2kzHd/WS7nafRw9O3bXK4ZnIxo0bdeHCBaWlpSknJ0dpaWnas2dPnYMyxRcAGgdHZyLNHQ2QnJysadOmadCgQfL399fzzz/v1AIBAL7LYYjccccd2r59uztqAQD4GJY9AQAYI0SAJsLV/Q3ewpn9KlbH8qU+FGcjRIBGztX9Dd7Cmf0qVsfypT4UV2EBRqCRc3V/g7ew22chs8dudSwr43tLH4oploIHmjhX9zd4C2f2q1gdy5f6UFyFMxEAQK04EwEAuAwhAgAwRogAAIwRIgAaPXvXGPGGsRrSE+It/SSECIBGy941RrxhrIb0hHhbPwmzswA0WvauMRIa6vmxGtIT4u5+EmZnAWiy7F1jxBvGakhPiLf1kzhcxRcAfFlxsXeOtW+fZ+7rbJyJAACMESIAAGOECACPcvZUVSvjWZ2m+/khL5lH68UIEQAe4eypqlbGszpNt7K8Uvv7pykkPlz7+6epstxH1mX3AKb4AvAIZ09VtTKe1Wm6JYUlCrr7H4OVnL+ioFAfWVbXyZjiC8ArOXuqqpXxrE7TDQoN0tHgmsGOBic02QCxgjMRAECtOBMBALgMIQIAMEaIAACMESIAfJbVHpOb9/OWZdQbA0IEgM+x2mNy837l5d61jHpjwOwsAD7Hao/JzfudP39jn4irl1FvDJidBaDRsdpjcvN+oaHetYx6Y8BS8AB8ktXl0G/ez5uWUW8MOBMBABgjRAAAxggRAIAxQgSAWznzGh32+j28pQfEW+pwNUIEgFs48xod9vpEnH19EmfW1pjRJwLALZx5jQ57fSKSc69PYsrZ10nxNPpEAHgFZ16jw16fiLOvT+LM2hozzkQAALXiTAQA4DKECADAGCECwCmcOXXXqkOH3H5I3IQQAdAgzpy6a1V5udS/vxQfX/O5vNzlh0QteGMdQIM4c+quVYWFNy7pfv58zQq9cD7eWAfgUs6cumtVaKgUHFzzdXAwAeJJLAUPoMF6Ftesr97TjccsLnbjwVArzkQAAMYIEQCAMUIEAGCMEAGaMFf3drh6OXRvXm7dm2tzJkIEaIJc3dvh6uXQvXm5dW+uzRXoEwGaIFf3drh6OXRvXm7dm2szQZ8IgFu4urfD1cuhe/Ny695cmytwJgIAqBVnIgAAlyFEAADGCBEAqIXVKdCeWAbfWxAiAHATq1OgPbEMvrfhjXUAuInVKdCeWAbf3XhjHQDqyeoUaE8sg+9tWAoeAOywury9J5bB9yaciQAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMOQyR0tJSjRkzRiNGjFC/fv300UcfuaMuAIAPcBgi27dv15gxY7Rr1y6lp6dryZIl7qgLAOADHK6dNX36dNvXFy5cUPv27V1aEIDG4/NDBQrpG+bpMuBClt8TKSoq0sqVK5WamurKegA0Alxno+mwdD2Rq1ev6pFHHtH06dP12GOP3bJ97dq1Wrdune12WVmZLl++7NRCAfiOpnCdjabC0fVEHIbItWvXNGnSJMXFxSklJcXSQbkoFYCjrQep56V3dDQ4wbZcOnyPoxBx+J7I5s2btXfvXl26dEl//OMf1bZtW23dutWpRQJofJr6dTaaCi6PCwCoFZfHBQC4DCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMCY5RCprKzUyJEjdeDAAVfWAwDwIZZC5OrVqxo9erTOnz/v6noajc8PFXi6BABwOctnIuvXr1fPnj1dWUujUFleqf390xQSH679/dNUWV7p6ZIAwGUshYi/v79CQkJq3b527Vp16tTJ9lFaWuq0An1NRXGFBh5cKkkaeHCpKoorPFwRALiOU95YnzNnjnJzc20fgYGBzhjWJwWFBulocIIk6WhwgoJCgzxcEQC4TnNPF9AY9SzeV/PZw3UAgKsxxRcAYKxeZyKbNm1yVR0AAB/EmQgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAGCECADBGiAAAjBEiAABjhAgAwBghAgAwRogAAIwRIgAAY4QIAMAYIQIAMEaIAACMESIAAGOECADAmKUQSU9PV3x8vEaOHKmioiJX1yRJ+vxQgVuOAwAw5zBE9u7dq+zsbL333ntKSUnRkiVLXFpQZXml9vdPU0h8uPb3T1NleaVLjwcAMOcwRLKysjRu3Dj5+flpwIABys7OvmWftWvXqlOnTraP0tJS44Iqiis08OBSSdLAg0tVUVxhPBYAwLUchkhpaalCQ0MlSc2aNVN5efkt+8yZM0e5ubm2j8DAQOOCgkKDdDQ4QZJ0NDhBQaFBxmMBAFyruaMdWrZseUNwfP311y4tSJJ6Fu+r+ezyIwEAGsLhmUhcXJz2798vSTp9+rTatm3r6poAAD7CYYgMGzZMOTk5+pd/+RclJydr9uzZ7qgLAOADHL6cFRAQoKysLGVmZmrChAnq1auXO+oCAPgAhyEi1QRJUlKSq2sBAPgYOtYBAMYIEQCAMUIEAGCMEAEAGCNEAADGCBEAgDFCBABgjBABABgjRAAAxggRAIAxS8ue1NeXX36pkJCQBo1RWlraoOuSeJqv1y/5/mOgfs/z9cdA/XJ4SfRmZWVl1Q06got06tRJubm5ni7DmK/XL/n+Y6B+z/P1x0D9jvFyFgDAGCECADDmtSHy5JNPerqEBvH1+iXffwzU73m+/hio3zGvfU8EAOD9vPZMBADg/QgRNEplZWV65513dOLECU+XAjRqhIiLVFZWauTIkTpw4ICnS6mX0tJSjRkzRiNGjFC/fv300UcfebqkequoqNDw4cOVnZ2tBQsWaP369Z4uydiUKVOUkZHh6TLq7Z577lFiYqISExO1ZMkST5djLCMjQ7NmzfJ0GfW2efNm2/OfmJiotm3b6n//939dciyXNBs2VHp6ujIzM/X9739fmzZtUrt27TxdUr1cvXpVY8aM0eeff+7pUupt+/btGjNmjMaOHat9+/ZpyZIl2rlzp6fLqpfjx49r/vz5GjZsmI4fP66FCxf65H8EO3bs0B/+8AcNHjzY06XUy9mzZ9W9e3e99tprni6lQfLy8rR27Vrt27fP06XU25QpUzRlyhRJUnZ2tjZs2KD27du75Fhedyayd+9eZWdn67333lNKSorP/hWzfv169ezZ09Nl1Nv06dM1duxYSdKFCxdc9oPnSr1799awYcN0+vRprVy5UuPHj/d0SfX2xRdfaM2aNXriiSc8XUq9vffee/r44481aNAgPfjggz55NltVVaVp06apa9eu2rZtm4qLiz1dkrF//dd/1dKlS102vteFSFZWlsaNGyc/Pz8NGDBA2dnZni6p3vz9/Ru87IunFRUVaeXKlUpNTfV0KcaysrJ06tQpnzuTlaSUlBQtX75cLVu29HQp9da9e3ft3LlT+/btU3p6uhYtWuTpkurt9ddfV1VVlZ555hnFxMQoMTFRVVVVni6r3t5991117NhRoaGhLjuG14VIaWmp7QE3a9ZM5eXlHq6o6bl69aomT56sxYsXKzIy0tPlGJs1a5a2bt2qxYsXe7qUetm8ebOio6MVHx/v6VKMxMTEKCoqSpLUo0cPnTp1ysMV1d9HH32kn/70p7r77rs1YMAABQQE6OzZs54uq95+9atfufxs1uveE2nZsuUNwfH11197sJqm59q1a5oyZYoSExP12GOPebocIxs3btSFCxeUlpamS5cuKTg42NMl1cvOnTt15coVJSYmKj8/XwEBAWrVqpWSkpI8XZolM2fO1KRJkzRkyBDt2LHDJ1/W7dKli23NqaKiIhUWFuquu+7ycFX1U1RUpLy8PPXo0cOlx/G6EImLi9P+/fuVlJSk06dPq23btp4uqUnZvHmz9u7dq0uXLumPf/yj2rZtq61bt3q6rHpJTk7WtGnTNGjQIPn7++v555/3dEn1smPHDtvXzz77rMLCwnwmQCRp8eLFmjJlihYtWqSQkBCtXbvW0yXVW3JyslJSUvTQQw+pqKhI//Zv/6bvfve7ni6rXjIzMzVgwACXH8frOta/+eYbDR48WLGxsXr//fc1c+ZM/fSnP/V0WQAAO7wuRKSaIMnMzNRdd92lXr16ebocAEAtvDJEAAC+wetmZwEAfAchAgAwRogAAIwRIgAAY4QIAMAYIQIAMPZ/uJPwXNBlTBQAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 480x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib as mpl\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "plt.rcParams[\"font.sans-serif\"] = \"SimHei\"\n",
    "plt.rcParams[\"axes.unicode_minus\"] = False\n",
    "\n",
    "fig = plt.figure(figsize=(6,6),dpi=80,facecolor=\"whitesmoke\", edgecolor=\"grey\")\n",
    "ax = fig.add_subplot(1,1,1)\n",
    "ax.scatter(data[:,0],data[:,1],marker='*',s=4.5,color='blue',label=\"normal data points\")\n",
    "ax.scatter(anomalies[:,0],anomalies[:,1],marker='*',s=4.5,color='red',label=\"outliers\")\n",
    "ax.set_title(\"anomalies on 2d view for isolation forest\")\n",
    "ax.legend(loc=\"upper left\", fontsize=7)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f1c7a42c-8d39-4e47-84c7-30a6b8ed2c08",
   "metadata": {},
   "source": [
    "## 其他 - 简单欧式距离 \n",
    "\n",
    "计算每个点到其余所有点的距离之和，这个和值明显过大的即为离群点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "id": "f79ce2df-19bf-45fe-9820-8d4711fb9bb5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., 0.],\n",
       "       [1., 1., 1., 1.]])"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "distances = np.array([np.sum(np.linalg.norm(x-data)) for x in data] )\n",
    "mean_distances = np.mean(distances)\n",
    "std_distances = np.std(distances)\n",
    "anomalies = np.array([data[i] for i,d in enumerate(distances) if d > mean_distances +3*std_distances],dtype=np.float64)\n",
    "anomalies"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b512b48-94ca-4374-8a55-bedba552833d",
   "metadata": {},
   "source": [
    "## 残差分析\n",
    "\n",
    "### 线性回归模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "id": "dac90795-bb1c-4604-8410-29c28c443ae6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "异常点的索引： [4]\n"
     ]
    }
   ],
   "source": [
    "#假设我们使用线性回归模型来进行拟合，然后计算残差并进行后续的异常点检测。\n",
    "#可以根据数据特点选择对应的模型，\n",
    "#例如：多项式回归、决策树回归、支持向量机回归等\n",
    "\n",
    "import numpy as np\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from scipy.stats import norm\n",
    "\n",
    "\n",
    "def detect_anomalies_with_lr(data): \n",
    "    X = np.arange(len(data)).reshape(-1, 1) \n",
    "    y = data\n",
    "    model = LinearRegression() \n",
    "    model.fit(X, y)\n",
    "    # 计算残差 \n",
    "    residuals = y - model.predict(X)\n",
    "    # 计算残差的均值和标准差 \n",
    "    mean_residual = np.mean(residuals) \n",
    "    std_residual = np.std(residuals)\n",
    "    # 计算残差的概率 \n",
    "    probabilities = norm.pdf(residuals, mean_residual, std_residual)\n",
    "    # 设定阈值，例如 0.01 \n",
    "    threshold = 0.01\n",
    "    # 识别异常点 \n",
    "    anomalies = [i for i, p in enumerate(probabilities) if p < threshold]\n",
    "    return anomalies\n",
    "    \n",
    "# 示例用法\n",
    "data = np.array([1, 2, 3, 4, 100, 5, 6, 7, 8, 9])\n",
    "anomalies = detect_anomalies_with_lr(data)\n",
    "print(\"异常点的索引：\", anomalies)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "574be8c8-6390-4fdc-9235-58981730841c",
   "metadata": {},
   "source": [
    "### 线性插值模型\n",
    "\n",
    "1. np.ployfit(x,y,deg)将会用LSM进行相关deg的拟合\n",
    "2. np.polyval(model,x) 将会使用model计算出相应x位置的y值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "54be864d-fb4e-4fd0-84ef-e39d40df0664",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "def detect_anomalies_time_series(data): \n",
    "    mean = np.mean(data) \n",
    "    trend = np.polyfit(np.arange(len(data)), data, 1) \n",
    "    predicted = np.polyval(trend, np.arange(len(data))) \n",
    "    residuals = data - predicted \n",
    "    std_residual = np.std(residuals)\n",
    "    mean_residual = np.mean(residuals)\n",
    "    # 计算残差的概率 \n",
    "    probabilities = norm.pdf(residuals, mean_residual, std_residual)\n",
    "    # 设定阈值，例如 0.01 \n",
    "    threshold = 0.01\n",
    "    # 识别异常点 \n",
    "    anomalies = [i for i, p in enumerate(probabilities) if p < threshold]\n",
    "    return anomalies\n",
    "data = np.array([1, 2, 3, 4, 100, 5, 6, 7, 8, 9])\n",
    "print(detect_anomalies_time_series(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2fdd1e2-35f9-4bef-9852-4737b25a5a18",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
